ncbi https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE147507
sra run selector https://www.ncbi.nlm.nih.gov/Traces/study/?acc=PRJNA615032&o=acc_s%3Aa
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(biomaRt)
## Warning: package 'biomaRt' was built under R version 4.0.3
library(edgeR)
## Warning: package 'edgeR' was built under R version 4.0.3
## Loading required package: limma
## Warning: package 'limma' was built under R version 4.0.3
library(limma)
library(seqinr)
##
## Attaching package: 'seqinr'
## The following object is masked from 'package:limma':
##
## zscore
## The following object is masked from 'package:biomaRt':
##
## getSequence
## The following object is masked from 'package:dplyr':
##
## count
library(VennDiagram)
## Loading required package: grid
## Loading required package: futile.logger
library(RColorBrewer)
library(pheatmap)
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
## Warning: package 'AnnotationDbi' was built under R version 4.0.3
## Loading required package: stats4
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.0.5
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.0.3
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.0.3
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.0.3
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:base':
##
## expand.grid
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
##
library(clusterProfiler)
## Warning: package 'clusterProfiler' was built under R version 4.0.3
##
## clusterProfiler v3.18.1 For help: https://guangchuangyu.github.io/software/clusterProfiler
##
## If you use clusterProfiler in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS: A Journal of Integrative Biology. 2012, 16(5):284-287.
##
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:biomaRt':
##
## select
## The following object is masked from 'package:stats':
##
## filter
library(ComplexHeatmap)
## Warning: package 'ComplexHeatmap' was built under R version 4.0.3
## ========================================
## ComplexHeatmap version 2.6.2
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
## ! pheatmap() has been masked by ComplexHeatmap::pheatmap(). 90% of the arguments
## in the original pheatmap() are identically supported in the new function. You
## can still use the original function by explicitly calling pheatmap::pheatmap().
##
## Attaching package: 'ComplexHeatmap'
## The following object is masked from 'package:pheatmap':
##
## pheatmap
metadata <- read.delim("../SraRunTable.txt",sep = ",")
#metadata <- metadata[,c("GEO_Accession..exp.","source_name","Treatment","Cell_Line","Time_point")]
#metadata <- metadata[order(metadata$Run),]
metadata <- metadata[order(metadata$Sample.Name),]
metadata <- unique(metadata[,c("Sample.Name","source_name","Treatment")])
rownames(metadata) <- seq(1:110)
head(metadata)
## Sample.Name source_name Treatment
## 1 GSM4432378 Mock treated NHBE cells Mock treatment
## 2 GSM4432379 Mock treated NHBE cells Mock treatment
## 3 GSM4432380 Mock treated NHBE cells Mock treatment
## 4 GSM4432381 SARS-CoV-2 infected NHBE cells SARS-CoV-2 infected (MOI 2)
## 5 GSM4432382 SARS-CoV-2 infected NHBE cells SARS-CoV-2 infected (MOI 2)
## 6 GSM4432383 SARS-CoV-2 infected NHBE cells SARS-CoV-2 infected (MOI 2)
data <- read.delim("../Counts_all.tsv")
rownames(data) <- data$X
data <- data[,-1]
head(data)
## Series1_NHBE_Mock_1 Series1_NHBE_Mock_2 Series1_NHBE_Mock_3
## DDX11L1 0 0 0
## WASH7P 29 24 23
## FAM138A 0 0 0
## FAM138F 0 0 0
## OR4F5 0 0 0
## LOC729737 112 119 113
## Series1_NHBE_SARS.CoV.2_1 Series1_NHBE_SARS.CoV.2_2
## DDX11L1 0 0
## WASH7P 34 19
## FAM138A 0 0
## FAM138F 0 0
## OR4F5 0 0
## LOC729737 127 84
## Series1_NHBE_SARS.CoV.2_3 Series2_A549_Mock_1 Series2_A549_Mock_2
## DDX11L1 0 0 0
## WASH7P 44 68 43
## FAM138A 0 0 0
## FAM138F 0 0 0
## OR4F5 0 0 0
## LOC729737 270 11 3
## Series2_A549_Mock_3 Series2_A549_SARS.CoV.2_1
## DDX11L1 0 0
## WASH7P 33 65
## FAM138A 0 0
## FAM138F 0 0
## OR4F5 0 0
## LOC729737 6 8
## Series2_A549_SARS.CoV.2_2 Series2_A549_SARS.CoV.2_3
## DDX11L1 1 1
## WASH7P 79 48
## FAM138A 0 0
## FAM138F 0 0
## OR4F5 0 0
## LOC729737 10 10
## Series3_A549_Mock_1 Series3_A549_Mock_2 Series3_A549_RSV_1
## DDX11L1 1 0 0
## WASH7P 184 128 51
## FAM138A 0 0 0
## FAM138F 0 0 0
## OR4F5 0 0 0
## LOC729737 108 95 37
## Series3_A549_RSV_2 Series4_A549_Mock_1 Series4_A549_Mock_2
## DDX11L1 0 0 0
## WASH7P 43 15 12
## FAM138A 0 0 0
## FAM138F 0 0 0
## OR4F5 0 0 0
## LOC729737 11 1 5
## Series4_A549_IAV_1 Series4_A549_IAV_2 Series5_A549_Mock_1
## DDX11L1 0 0 0
## WASH7P 3 3 64
## FAM138A 0 0 0
## FAM138F 0 0 0
## OR4F5 0 0 0
## LOC729737 0 2 7
## Series5_A549_Mock_2 Series5_A549_Mock_3 Series5_A549_SARS.CoV.2_1
## DDX11L1 0 0 0
## WASH7P 53 37 38
## FAM138A 0 0 0
## FAM138F 0 0 0
## OR4F5 0 0 0
## LOC729737 14 11 1
## Series5_A549_SARS.CoV.2_2 Series5_A549_SARS.CoV.2_3
## DDX11L1 0 0
## WASH7P 47 65
## FAM138A 0 0
## FAM138F 0 0
## OR4F5 0 0
## LOC729737 13 4
## Series6_A549.ACE2_Mock_1 Series6_A549.ACE2_Mock_2
## DDX11L1 0 0
## WASH7P 37 4
## FAM138A 0 0
## FAM138F 0 0
## OR4F5 0 0
## LOC729737 13 2
## Series6_A549.ACE2_Mock_3 Series6_A549.ACE2_SARS.CoV.2_1
## DDX11L1 0 0
## WASH7P 22 1
## FAM138A 0 0
## FAM138F 0 0
## OR4F5 0 0
## LOC729737 18 9
## Series6_A549.ACE2_SARS.CoV.2_2 Series6_A549.ACE2_SARS.CoV.2_3
## DDX11L1 0 0
## WASH7P 2 4
## FAM138A 0 0
## FAM138F 0 0
## OR4F5 0 0
## LOC729737 2 1
## Series7_Calu3_Mock_1 Series7_Calu3_Mock_2 Series7_Calu3_Mock_3
## DDX11L1 0 0 0
## WASH7P 25 60 84
## FAM138A 0 0 0
## FAM138F 0 0 0
## OR4F5 0 0 0
## LOC729737 65 184 435
## Series7_Calu3_SARS.CoV.2_1 Series7_Calu3_SARS.CoV.2_2
## DDX11L1 1 0
## WASH7P 47 32
## FAM138A 0 0
## FAM138F 0 0
## OR4F5 0 0
## LOC729737 271 137
## Series7_Calu3_SARS.CoV.2_3 Series8_A549_Mock_1 Series8_A549_Mock_2
## DDX11L1 0 0 0
## WASH7P 41 68 17
## FAM138A 0 0 0
## FAM138F 0 0 0
## OR4F5 0 0 0
## LOC729737 265 4 3
## Series8_A549_Mock_3 Series8_A549_RSV_1 Series8_A549_RSV_2
## DDX11L1 0 0 0
## WASH7P 21 18 9
## FAM138A 0 0 0
## FAM138F 0 0 0
## OR4F5 0 0 0
## LOC729737 5 13 9
## Series8_A549_RSV_3 Series8_A549_HPIV3_3 Series8_A549_HPIV3_2
## DDX11L1 1 0 0
## WASH7P 28 12 23
## FAM138A 0 0 0
## FAM138F 0 0 0
## OR4F5 0 0 0
## LOC729737 9 3 5
## Series8_A549_HPIV3_1 Series9_NHBE_Mock_1 Series9_NHBE_Mock_2
## DDX11L1 0 0 0
## WASH7P 14 57 58
## FAM138A 0 0 0
## FAM138F 0 0 0
## OR4F5 0 0 0
## LOC729737 5 58 51
## Series9_NHBE_Mock_3 Series9_NHBE_Mock_4 Series9_NHBE_IAV_1
## DDX11L1 0 0 0
## WASH7P 53 89 102
## FAM138A 0 0 0
## FAM138F 0 0 0
## OR4F5 0 0 0
## LOC729737 44 93 107
## Series9_NHBE_IAV_2 Series9_NHBE_IAV_3 Series9_NHBE_IAV_4
## DDX11L1 0 0 0
## WASH7P 26 21 7
## FAM138A 0 0 0
## FAM138F 0 0 0
## OR4F5 0 0 0
## LOC729737 37 30 10
## Series9_NHBE_IAVdNS1_1 Series9_NHBE_IAVdNS1_2 Series9_NHBE_IAVdNS1_3
## DDX11L1 0 0 0
## WASH7P 41 56 36
## FAM138A 0 0 0
## FAM138F 0 0 0
## OR4F5 0 0 0
## LOC729737 52 52 42
## Series9_NHBE_IAVdNS1_4 Series9_NHBE_IFNB_4h_1 Series9_NHBE_IFNB_4h_2
## DDX11L1 0 0 0
## WASH7P 131 72 66
## FAM138A 0 0 0
## FAM138F 0 0 0
## OR4F5 0 0 0
## LOC729737 101 94 102
## Series9_NHBE_IFNB_6h_1 Series9_NHBE_IFNB_6h_2 Series9_NHBE_IFNB_12h_1
## DDX11L1 0 0 0
## WASH7P 46 35 48
## FAM138A 0 0 0
## FAM138F 0 0 0
## OR4F5 0 0 0
## LOC729737 52 41 41
## Series9_NHBE_IFNB_12h_2 Series15_HealthyLungBiopsy_2
## DDX11L1 0 0
## WASH7P 46 261
## FAM138A 0 0
## FAM138F 0 0
## OR4F5 0 0
## LOC729737 60 15
## Series15_HealthyLungBiopsy_1 Series15_COVID19Lung_2
## DDX11L1 0 0
## WASH7P 140 0
## FAM138A 0 0
## FAM138F 0 0
## OR4F5 0 0
## LOC729737 70 17
## Series15_COVID19Lung_1 Series16_A549.ACE2_Mock_1
## DDX11L1 0 0
## WASH7P 0 0
## FAM138A 0 0
## FAM138F 0 0
## OR4F5 0 0
## LOC729737 0 1
## Series16_A549.ACE2_Mock_2 Series16_A549.ACE2_Mock_3
## DDX11L1 0 0
## WASH7P 11 7
## FAM138A 0 0
## FAM138F 0 0
## OR4F5 0 0
## LOC729737 2 2
## Series16_A549.ACE2_SARS.CoV.2_1 Series16_A549.ACE2_SARS.CoV.2_2
## DDX11L1 0 0
## WASH7P 2 6
## FAM138A 0 0
## FAM138F 0 0
## OR4F5 0 0
## LOC729737 0 0
## Series16_A549.ACE2_SARS.CoV.2_3 Series16_A549.ACE2_SARS.CoV.2_Rux_1
## DDX11L1 0 0
## WASH7P 5 12
## FAM138A 0 0
## FAM138F 0 0
## OR4F5 0 0
## LOC729737 1 4
## Series16_A549.ACE2_SARS.CoV.2_Rux_2
## DDX11L1 0
## WASH7P 6
## FAM138A 0
## FAM138F 0
## OR4F5 0
## LOC729737 0
## Series16_A549.ACE2_SARS.CoV.2_Rux_3
## DDX11L1 0
## WASH7P 8
## FAM138A 0
## FAM138F 0
## OR4F5 0
## LOC729737 2
new.data <- data[,1:6]
new.meta <- metadata[1:6,]
new.meta$source_name <- as.factor(new.meta$source_name)
##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
## (Intercept) source_nameSARS-CoV-2 infected NHBE cells
## 1 1 0
## 2 1 0
## 3 1 0
## 4 1 1
## 5 1 1
## 6 1 1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=3
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5486076
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T]
##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")
##### VOOM
v.1 <- voom(dge,new.design,plot=TRUE)
##### MDS plot
plotMDS(v.1, main="plotMDS(v)",cex=0.5)
##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.1,new.design)
# calculating the statistics
fit2 <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"
## [2] "source_nameSARS-CoV-2 infected NHBE cells"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")
##### list top differentially expressed genes
NHBE.sars.name = topTable(fit2, coef="source_nameSARS-CoV-2 infected NHBE cells", number=nrow(dge$counts))
R_gene.names<- NHBE.sars.name[NHBE.sars.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
## logFC AveExpr t P.Value adj.P.Val B
## SAA2 2.417163 4.969914 21.61939 1.876272e-11 2.243646e-07 16.42017
## CCL20 3.139099 4.212050 18.49831 1.301616e-10 7.782365e-07 14.36587
## INHBA 1.824571 6.232337 16.89001 3.993195e-10 9.694929e-07 13.76812
## S100A8 1.896605 6.707539 16.76486 4.375037e-10 9.694929e-07 13.68269
## TNFAIP3 1.618827 7.202701 16.62059 4.864490e-10 9.694929e-07 13.57575
## IL36G 2.750377 3.778832 17.03467 3.595973e-10 9.694929e-07 13.38953
## SAA1 2.247018 7.549586 15.76323 9.301177e-10 1.384014e-06 12.92361
## TNIP1 1.268120 8.310385 15.76837 9.264270e-10 1.384014e-06 12.91639
## KRT6B 1.550960 7.509390 15.61249 1.045847e-09 1.384014e-06 12.80312
## CXCL1 1.427990 6.904826 15.48328 1.157396e-09 1.384014e-06 12.70898
new.data <- data[,7:12]
new.meta <- metadata[7:12,]
new.meta$source_name <- as.factor(new.meta$source_name)
##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
## (Intercept) source_nameSARS-CoV-2 infected A549 cells
## 7 1 0
## 8 1 0
## 9 1 0
## 10 1 1
## 11 1 1
## 12 1 1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=3
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5603982
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T]
##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")
##### VOOM
v.2 <- voom(dge,new.design,plot=TRUE)
##### MDS plot
plotMDS(v.2, main="plotMDS(v)",cex=0.5)
##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.2,new.design)
# calculating the statistics
fit2 <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"
## [2] "source_nameSARS-CoV-2 infected A549 cells"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")
##### list top differentially expressed genes
A549.sars.0.2.name = topTable(fit2, coef="source_nameSARS-CoV-2 infected A549 cells", number=nrow(dge$counts))
R_gene.names<- A549.sars.0.2.name[A549.sars.0.2.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
## logFC AveExpr t P.Value adj.P.Val B
## IFI6 4.330863 5.212637 36.55897 4.711729e-15 5.755377e-11 21.85594
## ISG15 3.751729 4.023533 28.03836 1.705803e-13 1.041819e-09 18.71222
## IRF9 2.153629 5.286352 22.80394 2.734200e-12 6.679650e-09 18.05121
## IFIT1 4.283022 4.063577 24.51269 1.039055e-12 3.578297e-09 17.41369
## PARP9 2.067938 5.276501 20.82052 9.201990e-12 1.873372e-08 17.02836
## OAS1 1.542308 6.743258 20.19372 1.381461e-11 2.410650e-08 16.91588
## OAS3 1.381409 8.183566 18.58645 4.143981e-11 6.327341e-08 15.94093
## MX1 5.025758 3.141509 24.29392 1.171772e-12 3.578297e-09 15.84208
## IRF7 3.148346 3.813212 18.36691 4.847793e-11 6.579532e-08 14.88902
## STAT1 1.317132 8.311213 15.66700 3.898759e-10 4.609216e-07 13.74166
dim(A549.sars.0.2.name)
## [1] 12215 6
dim(dge)
## [1] 12215 6
new.data <- data[,13:16]
new.meta <- metadata[13:16,]
new.meta$source_name <- as.factor(new.meta$source_name)
##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
## (Intercept) source_nameRSV infected A549 cells
## 13 1 0
## 14 1 0
## 15 1 1
## 16 1 1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=2
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.584163
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T]
##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")
##### VOOM
v.3 <- voom(dge,new.design,plot=TRUE)
##### MDS plot
plotMDS(v.3, main="plotMDS(v)",cex=0.5)
##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.3,new.design)
# calculating the statistics
fit2 <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)" "source_nameRSV infected A549 cells"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")
##### list top differentially expressed genes
A549.RSV.name = topTable(fit2, coef="source_nameRSV infected A549 cells", number=nrow(dge$counts))
R_gene.names<- A549.RSV.name[A549.RSV.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
## logFC AveExpr t P.Value adj.P.Val B
## IFIT1 5.092480 7.242044 25.77272 1.283181e-144 1.633875e-140 323.1601
## MX1 5.564444 5.573895 24.26439 1.345748e-128 8.567703e-125 285.2392
## ISG15 4.536756 7.769077 23.56707 1.678089e-121 7.122371e-118 269.0784
## IFIT3 5.019121 5.975730 23.41241 5.914634e-120 1.882776e-116 265.2462
## OASL 5.527189 5.101092 22.70079 5.820997e-113 1.482375e-109 248.6107
## IFIT2 4.831337 5.167464 20.79999 2.686254e-95 5.700678e-92 207.7003
## IRF7 3.933973 6.821339 20.05523 8.816586e-89 1.603737e-85 192.8414
## HELZ2 3.826506 7.211111 19.81082 1.081018e-86 1.720575e-83 188.0098
## BATF2 4.485574 4.702147 18.41742 2.937129e-75 4.155385e-72 161.2441
## PARP10 4.027628 5.332585 18.24351 6.892530e-74 8.776258e-71 158.2173
dim(A549.RSV.name)
## [1] 12733 6
dim(dge)
## [1] 12733 4
new.data <- data[,17:20]
new.meta <- metadata[17:20,]
new.meta$source_name <- as.factor(new.meta$source_name)
new.meta$source_name <- relevel(new.meta$source_name, "Mock treated A549 cells")
str(new.meta)
## 'data.frame': 4 obs. of 3 variables:
## $ Sample.Name: chr "GSM4432394" "GSM4432395" "GSM4432396" "GSM4432397"
## $ source_name: Factor w/ 2 levels "Mock treated A549 cells",..: 1 1 2 2
## $ Treatment : chr "Mock treatment" "Mock treatment" "IAV infected (MOI 5)" "IAV infected (MOI 5)"
##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
## (Intercept) source_nameIAV infected A549 cells
## 17 1 0
## 18 1 0
## 19 1 1
## 20 1 1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=2
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5629215
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T]
##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")
##### VOOM
v.4 <- voom(dge,new.design,plot=TRUE)
##### MDS plot
plotMDS(v.4, main="plotMDS(v)",cex=0.5)
##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.4,new.design)
# calculating the statistics
fit2 <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)" "source_nameIAV infected A549 cells"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")
##### list top differentially expressed genes
A549.IAV.name = topTable(fit2, coef="source_nameIAV infected A549 cells", number=nrow(dge$counts))
R_gene.names<- A549.RSV.name[A549.IAV.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
## [1] logFC AveExpr t P.Value adj.P.Val B
## <0 rows> (or 0-length row.names)
new.data <- data[,21:26]
new.meta <- metadata[21:26,]
new.meta$source_name <- as.factor(new.meta$source_name)
str(new.meta)
## 'data.frame': 6 obs. of 3 variables:
## $ Sample.Name: chr "GSM4462336" "GSM4462337" "GSM4462338" "GSM4462339" ...
## $ source_name: Factor w/ 2 levels "Mock treated A549 cells",..: 1 1 1 2 2 2
## $ Treatment : chr "Mock treatment" "Mock treatment" "Mock treatment" "SARS-CoV-2 infected (MOI 2)" ...
##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
## (Intercept) source_nameSARS-CoV-2 infected A549 cells
## 21 1 0
## 22 1 0
## 23 1 0
## 24 1 1
## 25 1 1
## 26 1 1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=3
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5620957
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T]
##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")
##### VOOM
v.5 <- voom(dge,new.design,plot=TRUE)
##### MDS plot
plotMDS(v.5, main="plotMDS(v)",cex=0.5)
##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.5,new.design)
# calculating the statistics
fit2 <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"
## [2] "source_nameSARS-CoV-2 infected A549 cells"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")
##### list top differentially expressed genes
A549.sars.2.name = topTable(fit2, coef="source_nameSARS-CoV-2 infected A549 cells", number=nrow(dge$counts))
R_gene.names <- A549.sars.2.name[A549.sars.2.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
## logFC AveExpr t P.Value adj.P.Val B
## STC2 4.020526 6.428324 41.57514 1.343871e-14 1.646511e-10 23.33235
## GPX2 -2.773737 8.287283 -35.43363 9.442482e-14 4.542013e-10 21.95389
## C15orf48 2.757710 6.531311 34.11016 1.501073e-13 4.542013e-10 21.41921
## FASN -2.252676 8.106420 -33.70373 1.736858e-13 4.542013e-10 21.39276
## LAMC2 3.702878 7.288538 33.52408 1.853580e-13 4.542013e-10 21.25640
## MCM5 -2.384744 6.522076 -32.68780 2.520478e-13 5.146816e-10 20.99003
## PEG10 -2.331746 6.934755 -31.73875 3.606314e-13 5.363177e-10 20.68207
## TK1 -2.224042 6.762880 -31.65487 3.724192e-13 5.363177e-10 20.64664
## KRT8 -2.225048 10.699966 -30.77140 5.252867e-13 5.363177e-10 20.35035
## ICAM1 2.792513 6.189426 30.92044 4.953453e-13 5.363177e-10 20.29775
new.data <- data[,27:32]
new.meta <- metadata[27:32,]
new.meta$source_name <- as.factor(new.meta$source_name)
str(new.meta)
## 'data.frame': 6 obs. of 3 variables:
## $ Sample.Name: chr "GSM4462342" "GSM4462343" "GSM4462344" "GSM4462345" ...
## $ source_name: Factor w/ 2 levels "Mock treated A549 cells trasnduced with a vector expressing human ACE2",..: 1 1 1 2 2 2
## $ Treatment : chr "Mock treatment" "Mock treatment" "Mock treatment" "SARS-CoV-2 infected (MOI 0.2)" ...
##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
## (Intercept)
## 27 1
## 28 1
## 29 1
## 30 1
## 31 1
## 32 1
## source_nameSARS-CoV-2 infected A549 cells trasnduced with a vector expressing human ACE2
## 27 0
## 28 0
## 29 0
## 30 1
## 31 1
## 32 1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=3
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5582878
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T]
##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")
##### VOOM
v.6 <- voom(dge,new.design,plot=TRUE)
##### MDS plot
plotMDS(v.6, main="plotMDS(v)",cex=0.5)
##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.6,new.design)
# calculating the statistics
fit2 <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"
## [2] "source_nameSARS-CoV-2 infected A549 cells trasnduced with a vector expressing human ACE2"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")
##### list top differentially expressed genes
A549.ACE2.0.2.sars.name = topTable(fit2, coef="source_nameSARS-CoV-2 infected A549 cells trasnduced with a vector expressing human ACE2", number=nrow(dge$counts))
R_gene.names <- A549.ACE2.0.2.sars.name[A549.ACE2.0.2.sars.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
## logFC AveExpr t P.Value adj.P.Val B
## JUNB 3.523119 7.140814 33.96592 1.407961e-25 1.713348e-21 48.22323
## NFKBIA 3.270176 8.573796 31.08024 1.900859e-24 7.782733e-21 45.86466
## CCNL1 2.845292 7.692370 30.09558 4.868825e-24 1.481218e-20 44.89106
## TNFAIP3 3.458063 6.466889 28.85271 1.664489e-23 3.538519e-20 43.52932
## CXCL2 6.886796 5.109736 31.07033 1.918662e-24 7.782733e-21 43.49306
## ARRDC3 3.506089 6.122677 28.80610 1.744688e-23 3.538519e-20 43.39858
## DDIT3 3.580606 5.877895 28.40105 2.634018e-23 4.579052e-20 42.93331
## PPP1R15A 3.227209 8.065816 27.92096 4.322787e-23 6.575499e-20 42.76773
## IER5 2.330592 7.899642 25.51727 5.850825e-22 7.910966e-19 40.18662
## BBC3 2.720648 7.599000 24.81725 1.303591e-21 1.586340e-18 39.38640
new.data <- data[,33:38]
new.meta <- metadata[33:38,]
new.meta$source_name <- as.factor(new.meta$source_name)
str(new.meta)
## 'data.frame': 6 obs. of 3 variables:
## $ Sample.Name: chr "GSM4462348" "GSM4462349" "GSM4462350" "GSM4462351" ...
## $ source_name: Factor w/ 2 levels "Mock treated Calu-3 cells",..: 1 1 1 2 2 2
## $ Treatment : chr "Mock treatment" "Mock treatment" "Mock treatment" "SARS-CoV-2 infected (MOI 2)" ...
##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
## (Intercept) source_nameSARS-CoV-2 infected Calu-3 cells
## 33 1 0
## 34 1 0
## 35 1 0
## 36 1 1
## 37 1 1
## 38 1 1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=3
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5648025
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T]
##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")
##### VOOM
v.7 <- voom(dge,new.design,plot=TRUE)
##### MDS plot
plotMDS(v.7, main="plotMDS(v)",cex=0.5)
##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.7,new.design)
# calculating the statistics
fit2 <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"
## [2] "source_nameSARS-CoV-2 infected Calu-3 cells"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")
##### list top differentially expressed genes
calu.sars.name = topTable(fit2, coef="source_nameSARS-CoV-2 infected Calu-3 cells", number=nrow(dge$counts))
R_gene.names <- calu.sars.name[calu.sars.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
## logFC AveExpr t P.Value adj.P.Val B
## OAS2 4.866901 7.523400 45.13101 1.878023e-17 1.244333e-13 30.12248
## IFIT2 5.342672 8.193275 44.90876 2.021498e-17 1.244333e-13 30.11478
## IFIT1 4.537755 7.548068 42.63578 4.384391e-17 1.496508e-13 29.37067
## IFIT3 4.926069 7.053995 41.98359 5.516330e-17 1.496508e-13 29.08442
## RSAD2 5.120329 6.094588 41.71122 6.077931e-17 1.496508e-13 28.78729
## IL1A 5.238155 6.887289 40.32193 1.006530e-16 2.065232e-13 28.49478
## PPP1R15A 3.904741 8.292272 37.59917 2.848085e-16 5.008968e-13 27.67445
## MX1 3.872235 7.747760 36.96448 3.668257e-16 5.017769e-13 27.41033
## TXNIP 3.426616 8.950076 36.16208 5.082591e-16 5.759155e-13 27.13415
## ICAM1 3.668712 9.581654 35.57920 6.470044e-16 5.759155e-13 26.90578
new.data <- data[,39:47]
new.meta <- metadata[39:47,]
new.meta$source_name <- as.factor(new.meta$source_name)
str(new.meta)
## 'data.frame': 9 obs. of 3 variables:
## $ Sample.Name: chr "GSM4462354" "GSM4462355" "GSM4462356" "GSM4462357" ...
## $ source_name: Factor w/ 3 levels "HPIV3 infected A549 cells",..: 2 2 2 3 3 3 1 1 1
## $ Treatment : chr "Mock treatment" "Mock treatment" "Mock treatment" "RSV infected (MOI 2)" ...
new.meta$source_name <- relevel(new.meta$source_name,"Mock treated A549 cells")
##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
## (Intercept) source_nameHPIV3 infected A549 cells
## 39 1 0
## 40 1 0
## 41 1 0
## 42 1 0
## 43 1 0
## 44 1 0
## 45 1 1
## 46 1 1
## 47 1 1
## source_nameRSV infected A549 cells
## 39 0
## 40 0
## 41 0
## 42 1
## 43 1
## 44 1
## 45 0
## 46 0
## 47 0
## attr(,"assign")
## [1] 0 1 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=5
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5437446
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T]
##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")
##### VOOM
v.8 <- voom(dge,new.design,plot=TRUE)
##### MDS plot
plotMDS(v.8, main="plotMDS(v)",cex=0.5)
##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.8,new.design)
# calculating the statistics
fit2 <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"
## [2] "source_nameHPIV3 infected A549 cells"
## [3] "source_nameRSV infected A549 cells"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")
##### list top differentially expressed genes
A549.RSV.2.name = topTable(fit2, coef="source_nameRSV infected A549 cells", number=nrow(dge$counts))
A549.HPIV3.3.name = topTable(fit2, coef="source_nameHPIV3 infected A549 cells", number=nrow(dge$counts))
R_gene.names <- A549.RSV.2.name[A549.RSV.2.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
## logFC AveExpr t P.Value adj.P.Val B
## IFIT3 6.009547 6.850910 30.19556 5.715170e-10 2.606897e-06 12.68200
## MX1 6.577520 6.385205 30.35499 5.466270e-10 2.606897e-06 12.51551
## IFIT2 7.342778 7.032180 29.68654 6.598625e-10 2.606897e-06 12.38024
## GBP4 6.979492 3.045794 27.25794 1.356882e-09 4.020440e-06 11.45091
## LAMC2 4.578369 7.313385 21.59082 9.648150e-09 1.270215e-05 10.68643
## OASL 7.970537 5.874429 22.63680 6.485822e-09 1.270215e-05 10.64700
## HERC5 4.390229 6.311126 21.01905 1.208309e-08 1.270215e-05 10.46532
## GBP1 6.142683 5.069691 21.67427 9.340912e-09 1.270215e-05 10.45643
## GLIPR1 3.694440 5.701637 20.86318 1.286077e-08 1.270215e-05 10.44888
## IFI44 7.954380 2.885669 21.95598 8.381613e-09 1.270215e-05 10.28142
new.data <- data[,48:65]
new.meta <- metadata[48:65,]
new.meta$source_name <- as.factor(new.meta$source_name)
new.meta$source_name <- relevel(new.meta$source_name,"Mock treated NHBE cells")
##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
## (Intercept) source_namehuman IFNB treated NHBE cells
## 48 1 0
## 49 1 0
## 50 1 0
## 51 1 0
## 52 1 0
## 53 1 0
## 54 1 0
## 55 1 0
## 56 1 0
## 57 1 0
## 58 1 0
## 59 1 0
## 60 1 1
## 61 1 1
## 62 1 1
## 63 1 1
## 64 1 1
## 65 1 1
## source_nameIAV infected NHBE cells source_nameIAVdNS1 infected NHBE cells
## 48 0 0
## 49 0 0
## 50 0 0
## 51 0 0
## 52 1 0
## 53 1 0
## 54 1 0
## 55 1 0
## 56 0 1
## 57 0 1
## 58 0 1
## 59 0 1
## 60 0 0
## 61 0 0
## 62 0 0
## 63 0 0
## 64 0 0
## 65 0 0
## attr(,"assign")
## [1] 0 1 1 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=9
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5401202
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T]
##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")
##### VOOM
v.9 <- voom(dge,new.design,plot=TRUE)
##### MDS plot
plotMDS(v.9, main="plotMDS(v)",cex=0.5)
##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.9,new.design)
# calculating the statistics
fit2 <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"
## [2] "source_namehuman IFNB treated NHBE cells"
## [3] "source_nameIAV infected NHBE cells"
## [4] "source_nameIAVdNS1 infected NHBE cells"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")
##### list top differentially expressed genes
NHBE.IAV.name = topTable(fit2, coef="source_nameIAV infected NHBE cells", number=nrow(dge$counts))
NHBE.IAV.dNS1.name = topTable(fit2, coef="source_nameIAVdNS1 infected NHBE cells", number=nrow(dge$counts))
NHBE.IFNB.name = topTable(fit2, coef="source_namehuman IFNB treated NHBE cells", number=nrow(dge$counts))
R_gene.names <- NHBE.IAV.name[NHBE.IAV.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
## logFC AveExpr t P.Value adj.P.Val B
## MIR22HG 1.0223077 6.389417 8.582620 3.740674e-08 0.0004403895 8.963597
## PDIA6 0.7921531 8.137763 7.591399 2.524154e-07 0.0011274978 7.129707
## HELZ2 2.1917029 7.524074 7.383454 3.830792e-07 0.0011274978 6.720001
## IRF9 1.3456132 6.044571 7.262176 4.899157e-07 0.0011535554 6.478208
## JUND -1.6703224 5.737263 -7.076829 7.162305e-07 0.0012789148 6.122546
## UPK3B -2.0990846 1.723997 -7.434852 3.453607e-07 0.0011274978 5.962404
## MAL -3.3926298 3.883394 -6.931523 9.677339e-07 0.0012789148 5.786257
## MX1 4.4436771 7.610313 6.926610 9.776806e-07 0.0012789148 5.703800
## ECM1 -1.2508931 7.376249 -6.820097 1.221280e-06 0.0013071022 5.596559
## EPN3 -1.1193144 4.644277 -6.772787 1.348775e-06 0.0013232609 5.514740
new.data <- data[,71:76]
new.meta <- metadata[102:107,]
new.meta$source_name <- as.factor(new.meta$source_name)
##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
## (Intercept)
## 102 1
## 103 1
## 104 1
## 105 1
## 106 1
## 107 1
## source_nameSARS-CoV-2 infected A549 cells trasnduced with a vector expressing human ACE2
## 102 0
## 103 0
## 104 0
## 105 1
## 106 1
## 107 1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=3
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5828784
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T]
##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")
##### VOOM
v.16 <- voom(dge,new.design,plot=TRUE)
##### MDS plot
plotMDS(v.16, main="plotMDS(v)",cex=0.5)
##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.16,new.design)
# calculating the statistics
fit2 <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"
## [2] "source_nameSARS-CoV-2 infected A549 cells trasnduced with a vector expressing human ACE2"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")
##### list top differentially expressed genes
A549.ACE2.sars.name = topTable(fit2, coef="source_nameSARS-CoV-2 infected A549 cells trasnduced with a vector expressing human ACE2", number=nrow(dge$counts))
R_gene.names <- A549.ACE2.sars.name[A549.ACE2.sars.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
## [1] logFC AveExpr t P.Value adj.P.Val B
## <0 rows> (or 0-length row.names)
##### Heatmap df id
df.1 <- A549.ACE2.sars.name %>% mutate(gene_id = rownames(A549.ACE2.sars.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )
df.2 <- A549.RSV.name %>% mutate(gene_id = rownames(A549.RSV.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )
df.3 <- A549.IAV.name %>% mutate(gene_id = rownames(A549.IAV.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )
df.4 <- A549.ACE2.0.2.sars.name %>% mutate(gene_id = rownames(A549.ACE2.0.2.sars.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )
df.5 <- A549.ACE2.sars.name %>% mutate(gene_id = rownames(A549.ACE2.sars.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )
df.6 <- calu.sars.name %>% mutate(gene_id = rownames(calu.sars.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )
df.7 <- A549.RSV.2.name %>% mutate(gene_id = rownames(A549.RSV.2.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )
df.8 <- A549.HPIV3.3.name %>% mutate(gene_id = rownames(A549.HPIV3.3.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )
## 05.17
df.9 <- NHBE.sars.name %>% mutate(gene_id = rownames(NHBE.sars.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )
df.id <- unique(c(df.1$gene_id,df.2$gene_id,df.3$gene_id,df.4$gene_id,df.5$gene_id,df.6$gene_id,df.7$gene_id,df.8$gene_id,df.9$gene_id))
A549.ACE2.sars.expr <- rowSums(v.16$E[,4:6])/3
A549.RSV.expr <- rowSums(v.3$E[,3:4])/2
A549.IAV.expr <- rowSums(v.4$E[,3:4])/2
A549.sars.2.expr <- rowSums(v.5$E[,4:6])/3
A549.ACE2.0.2.sars.expr <- rowSums(v.6$E[,4:6])/3
calu.sars.expr <- rowSums(v.7$E[,4:6])/3
A549.RSV.2.expr <- rowSums(v.8$E[,4:6])/3
A549.HPIV3.3.expr <- rowSums(v.8$E[,7:9])/3
A549.ACE2.sars.expr <- A549.ACE2.sars.name$logFC
names(A549.ACE2.sars.expr) <- rownames(A549.ACE2.sars.name)
A549.RSV.expr <- A549.RSV.name$logFC
names(A549.RSV.expr) <- rownames(A549.RSV.name)
A549.IAV.expr <- A549.IAV.name$logFC
names(A549.IAV.expr) <- rownames(A549.IAV.name)
A549.sars.2.expr <- A549.sars.2.name$logFC
names(A549.sars.2.expr) <- rownames(A549.sars.2.name)
A549.ACE2.0.2.sars.expr <- A549.ACE2.0.2.sars.name$logFC
names(A549.ACE2.0.2.sars.expr) <- rownames(A549.ACE2.0.2.sars.name)
calu.sars.expr <- calu.sars.name$logFC
names(calu.sars.expr) <- rownames(calu.sars.name)
A549.RSV.2.expr <- A549.RSV.2.name$logFC
names(A549.RSV.2.expr) <- rownames(A549.RSV.2.name)
A549.HPIV3.3.expr <- A549.HPIV3.3.name$logFC
names(A549.HPIV3.3.expr) <- rownames(A549.HPIV3.3.name)
## 05.16
NHBE.sars.expr <- NHBE.sars.name$logFC
names(NHBE.sars.expr) <- rownames(NHBE.sars.name)
expr.df.ind <- unique(c(rownames(A549.ACE2.sars.name),rownames(A549.RSV.name),rownames(A549.IAV.name),rownames(A549.sars.2.name),rownames(A549.ACE2.0.2.sars.name),rownames(calu.sars.name),rownames(A549.RSV.2.name),rownames(A549.HPIV3.3.name)))
expr <- as.data.frame(matrix(0,nrow=length(expr.df.ind),ncol = 1))
rownames(expr) <- expr.df.ind
expr$A549.ACE2_sars <- A549.ACE2.sars.expr[row.names(expr)]
expr$A549_RSV_15 <- A549.RSV.expr[row.names(expr)]
expr$A549_IAV <- A549.IAV.expr[row.names(expr)]
expr$A549_sars_2 <- A549.sars.2.expr[row.names(expr)]
expr$A549.ACE2_sars_0.2 <- A549.ACE2.0.2.sars.expr[row.names(expr)]
expr$calu.3_sars <- calu.sars.expr[row.names(expr)]
expr$A549_RSV_2 <- A549.RSV.2.expr[row.names(expr)]
expr$A549_HPIV3 <- A549.HPIV3.3.expr[row.names(expr)]
## 05.16
expr$NHBE.sars.expr <- NHBE.sars.expr[rownames(expr)]
expr <- expr[,-1]
expr[is.na(expr)] <- 0
head(expr)
## A549.ACE2_sars A549_RSV_15 A549_IAV A549_sars_2 A549.ACE2_sars_0.2
## PYROXD1 1.4445284 -0.19520144 0.54641768 1.32184913 0.5000972
## LOC646903 2.0447325 -0.35385533 0.93490427 -0.55711867 1.7906765
## COL12A1 -0.9698611 0.17288559 0.01182408 1.81695058 0.2521718
## MIR210HG 0.9604034 -0.07048356 -3.03185845 -0.08735310 -1.6524088
## ELOVL2 -1.1202871 -0.89029270 0.72595020 0.01463756 -0.3520592
## ST8SIA4 -1.1714517 -0.49092927 -0.19257813 -1.30514808 0.2583169
## calu.3_sars A549_RSV_2 A549_HPIV3 NHBE.sars.expr
## PYROXD1 1.5127424 0.5006636 0.3833402 -0.04122459
## LOC646903 -0.3175119 0.0000000 0.0000000 0.00000000
## COL12A1 -0.4787169 1.2087476 1.0735949 0.36340299
## MIR210HG -1.2358923 0.8117790 -1.8625108 0.19876360
## ELOVL2 0.0000000 0.6855460 0.7769844 0.00000000
## ST8SIA4 0.0000000 -0.1548775 1.0237359 0.00000000
#kk <- rowSums(expr)
#kk.ind <- expr[kk == "0"]
#expr[kk == "0"]
#dexpr <- expr[-kk.ind,]
#dexpr
biomart download GO term ruls: https://support.bioconductor.org/p/122315/
#library(biomaRt)
#uses human ensembl annotations
ensembl <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
#gets gene symbol, transcript_id and go_id for all genes annotated with GO:0007507
cytokine <- getBM(attributes=c('hgnc_symbol', 'external_gene_name', 'go_id', 'name_1006'),
filters = 'go', values = c('GO:0034097'), mart = ensembl)
innate.immune <- getBM(attributes=c('hgnc_symbol', 'external_gene_name', 'go_id', 'name_1006'),
filters = 'go', values = 'GO:0045087', mart = ensembl)
virus <- getBM(attributes=c('hgnc_symbol', 'external_gene_name', 'go_id', 'name_1006'),
filters = 'go', values = 'GO:0009615', mart = ensembl)
inflammatory <- getBM(attributes=c('hgnc_symbol', 'external_gene_name', 'go_id', 'name_1006'),
filters = 'go', values = 'GO:0006954', mart = ensembl)
type.I <- getBM(attributes=c('hgnc_symbol', 'external_gene_name', 'go_id', 'name_1006'),
filters = 'go',values = c('GO:0034340'),mart = ensembl)
type.III <- getBM(attributes=c('hgnc_symbol', 'external_gene_name', 'go_id', 'name_1006'),
filters = 'go',values = c('GO:0034342'),mart = ensembl)
#b <- listFilters(mart = ensembl)
#a <- listAttributes(mart = ensembl)
#a[a$name == "go",]
#b[b$name == "name",]
cytokine <- cytokine[cytokine$go_id == 'GO:0034097',"external_gene_name"]
innate.immune <- innate.immune[innate.immune$go_id == 'GO:0045087',"external_gene_name"]
virus <- virus[virus$go_id == 'GO:0009615',"external_gene_name"]
inflammatory <- inflammatory[inflammatory$go_id == 'GO:0006954',"external_gene_name"]
type.I.III <- c(type.I[type.I$go_id == 'GO:0034340',"external_gene_name"], type.III[type.III$go_id == 'GO:0034342',"external_gene_name"])
anno.id <- unique(c(cytokine,innate.immune,virus,inflammatory,type.I.III))
#length(df.id)
#anna <- as.data.frame(matrix(0,nrow=length(df.id),ncol=5))
#rownames(anna) <- df.id
#anna$V6 <- seq(length(df.id))
dexpr = expr
ind.anno.df = rownames(dexpr[rownames(dexpr) %in% anno.id, ])
anna <- as.data.frame(matrix(0,nrow=length(ind.anno.df),ncol=5))
rownames(anna) <- ind.anno.df
anna$V6 <- seq(length(ind.anno.df))
cytokine.ind <- anna$V6[rownames(anna) %in% cytokine]
innate.immune.ind <- anna$V6[rownames(anna) %in% innate.immune]
virus.ind <- anna$V6[rownames(anna) %in% virus]
inflammatory.ind <- anna$V6[rownames(anna) %in% inflammatory]
type.I.III.ind <- anna$V6[rownames(anna) %in% type.I.III]
anna$V1[type.I.III.ind] <- 1
anna$V2[cytokine.ind] <- 1
anna$V3[innate.immune.ind] <- 1
anna$V4[virus.ind] <- 1
anna$V5[inflammatory.ind] <- 1
anna <- anna[,1:5]
anna.ind <- anna[rowSums(anna) != 0,]
anna <- anna.ind
anna.ind <- rownames(anna)
colnames(anna) <- c("IFN I & III","cytokine","innate.immune","response to virus","inflammatory")
anna$cytokine <- as.factor(anna$cytokine)
anna$innate.immune <- as.factor(anna$innate.immune)
anna$`response to virus` <- as.factor(anna$`response to virus`)
anna$inflammatory <- as.factor(anna$inflammatory)
anna$`IFN I & III` <- as.factor(anna$`IFN I & III`)
#anna <- t(anna)
#anna <- as.data.frame(anna)
#anna
#length(anna)
#anna = HeatmapAnnotation(df = anna)
anna_hh = HeatmapAnnotation(df = anna, col = list(`IFN I & III` = c("0" = "white", "1" = "pink"),
cytokine = c("0" = "white", "1" = "blue"),
innate.immune = c("0" = "white", "1" = "red"),
`response to virus` = c("0" = "white", "1" = "green"),
inflammatory = c("0" = "white", "1" = "yellow")),
which = "column",
show_legend = F)
col.concent = brewer.pal(10, "RdBu")
dexpr = expr
dexpr = dexpr[rownames(dexpr) %in% df.id, ]
dexpr = as.matrix(dexpr)
dexpr = t(dexpr)
#png("heatmap_GO.png", width=4000,height=800, res=315, pointsize=12)
png("heatmap_0507.png", width=4000,height=800, res=315, pointsize=12)
pheatmap(dexpr, cluster_rows=T, cluster_cols=T, main="FDR < 0.5% & >4-fold",
scale = "column",
gaps_col = gaps_col,
clustering_distance_cols = "correlation",
cellwidth = 0.5,
cellheight = 10,
# cutree_cols = 2,
treeheight_col = 5,
treeheight_row = 5,
width = 100,
border_color = FALSE, legend = TRUE, legend_labels = "up-/down- regulated",height=20,
show_colnames = F,
color=colorRampPalette(col.concent)(500))
dev.off()
dexpr = expr
#dexpr = dexpr[rownames(dexpr) %in% anna.ind, ]
dexpr = dexpr[rownames(dexpr) %in% ind.anno.df, ]
dexpr = as.matrix(dexpr)
dexpr = t(dexpr)
png("heatmap_0516_2_author_complex.png", width=4000,height=1000, res=315, pointsize=12)
Heatmap(dexpr,bottom_annotation = anna_hh, cluster_columns = T, cluster_rows = T, show_row_names = T,show_column_names = F,
clustering_distance_columns = "pearson",
clustering_distance_row = "pearson",
column_dend_height = unit(0.5,"cm"),row_dend_width = unit(0.5,"cm"))
dev.off()
## quartz_off_screen
## 2
#Heatmap(dexpr,bottom_annotation = anna_hh, cluster_columns = T, cluster_rows = T, show_row_names = T,show_column_names = F,clustering_distance_columns = "pearson",column_dend_height = unit(0.5,"cm"),row_dend_width = unit(0.5,"cm"))
#dev.off()